#### load data ####
# exclude respondents aged over 69 years
original.data <- subset(read.csv("ideological_label_conjoint_data.csv", 
                                 fileEncoding = "utf8"), 
                        Q5.2 < 53)
N <- nrow(original.data)

# densely inhabited district (DID) population ratio of each prefecture
prefecture.DID.ratio <- read.csv("prefecture_DID_ratio.csv", 
                                 fileEncoding = "utf8")

#### respondent-level variables ####
# gender (dummy variable, woman = 1)
gender <- original.data$Q5.1 - 1
# age
age <- original.data$Q5.2 + 17
# education: high school or lower
low.edu <- (original.data$Q5.3 < 3) * 1
# education: technical college/community college/vocational college
middle.edu <- (original.data$Q5.3 > 2 & original.data$Q5.3 < 6) * 1
# education: college or higher
high.edu <- (original.data$Q5.3 > 5) * 1
# DID population ratio of respondents' prefecture of residence
DID.ratio <- prefecture.DID.ratio[original.data$Q5.4, 3] / 100
# self-reported knowledge about left-right labels
knowledge <- original.data$Q2
# ideological self-identification
ideology <- rowSums(tcrossprod(rep(1, N), 1:10) * original.data[, 93:102], na.rm = TRUE)
ideology[ideology == 0] <- NA
# issue attitude: Article 9
article9.attitude <- original.data$Q1_1
# issue attitude: defense power
defense.attitude <- original.data$Q1_2
# issue attitude: revisionism
revisionism.attitude <- original.data$Q1_3
# issue attitude: women
women.attitude <- original.data$Q1_4
# issue attitude: ray marriage
gay.attitude <- original.data$Q1_5
# issue attitude: foreign workers
immigrant.attitude <- original.data$Q1_6
# issue attitude: economic growth
growth.attitude <- original.data$Q1_7
# issue attitude: progressive tax
tax.attitude <- original.data$Q1_8
# satisficing respondents (directed questions)
satisficing <- ! (original.data$Q1_9 == 5 & original.data$Q1_10 == 1)
# experimental condition
condition <- original.data$condition

#### respondent-level dataset ####
respondent.raw.data <- data.frame(ID = 1:N, gender, age, low.edu, middle.edu, 
                                  high.edu, DID.ratio, knowledge, ideology, 
                                  article9.attitude, defense.attitude, 
                                  revisionism.attitude, women.attitude, 
                                  gay.attitude, immigrant.attitude, 
                                  growth.attitude, tax.attitude, 
                                  satisficing, condition)
# exclude satisficers
respondent.data <- subset(respondent.raw.data, satisficing == FALSE)

write.csv(respondent.data, file = "respondent_data.csv", row.names = FALSE)

#### task-level variables ####
# record the order in which the eight issues appeared for each respondent
issue.wording <- c("憲法9条を改正すべきである", "日本の防衛力を強化すべきである", 
                   "日本は戦前・戦中の出来事に関して近隣諸国に謝罪するのをやめるべきである", 
                   "専業主婦世帯の税制を見直し，女性の社会進出を促進すべきである", 
                   "男性同士，女性同士での結婚を法律で認めるべきである", 
                   "日本は外国人労働者を積極的に受け入れるべきである", 
                   "経済的格差の是正よりも，経済成長を優先すべきである", 
                   "裕福な人に対する課税を強化すべきである")

issue <- matrix(NA, N, 8)
for (i in 1:N) {
  count <- 1
  for (j in seq(1, 15, 2)) {
    issue[i, count] <- match(original.data[i, j], issue.wording)
    count <- count + 1
  }
}

# record the positions of six candidates on the eight issues for each respondent
# (for now, the order of issues is different across respondents)
position.wording <- c("賛成", "賛成でも反対でもない", "反対")

position <- array(NA, c(N, 8, 6))
for (i in 1:N) {
  count <- 1
  for (j in seq(2, 16, 2)) {
    position[i, count, 1] <- match(original.data[i, j], position.wording)
    count <- count + 1
  }
  count <- 1
  for (j in 17:24) {
    position[i, count, 2] <- match(original.data[i, j], position.wording)
    count <- count + 1
  }
  count <- 1
  for (j in seq(26, 40, 2)) {
    position[i, count, 3] <- match(original.data[i, j], position.wording)
    count <- count + 1
  }
  count <- 1
  for (j in 41:48) {
    position[i, count, 4] <- match(original.data[i, j], position.wording)
    count <- count + 1
  }
  count <- 1
  for (j in seq(50, 64, 2)) {
    position[i, count, 5] <- match(original.data[i, j], position.wording)
    count <- count + 1
  }
  count <- 1
  for (j in 65:72) {
    position[i, count, 6] <- match(original.data[i, j], position.wording)
    count <- count + 1
  }
}

# sort the order of issues
# the ninth column = whether the candidate was displayed in the right column of the conjoint table (right = 1)
profile <- array(NA, c(N, 9, 6))
for (i in 1:N) {
  for (j in 1:8) {
    for (k in 1:6) {
      profile[i, j, k] <- position[i, which(issue[i, ] == j), k]
    }
    profile[i, 9, 1] <- profile[i, 9, 3] <- profile[i, 9, 5] <- 0
    profile[i, 9, 2] <- profile[i, 9, 4] <- profile[i, 9, 6] <- 1
  }
}

# record which candidate was perceived to be more rightest for each respondent
# (i.e., for the "right"-label condition, choice = 1 if chosen; 
# for the "left"-label condition, choice = 1 if not chosen)
choice <- matrix(NA, N, 6)
for (i in 1:N) {
  if (original.data$condition[i] == 0) {
    choice[i, 1] <- 2 - original.data$Q4.1.1.1[i]
    choice[i, 2] <- original.data$Q4.1.1.1[i] - 1
    choice[i, 3] <- 2 - original.data$Q4.2.1.1[i]
    choice[i, 4] <- original.data$Q4.2.1.1[i] - 1
    choice[i, 5] <- 2 - original.data$Q4.3.1.1[i]
    choice[i, 6] <- original.data$Q4.3.1.1[i] - 1
  }
  else {
    choice[i, 1] <- original.data$Q4.1.1.2[i] - 1
    choice[i, 2] <- 2 - original.data$Q4.1.1.2[i]
    choice[i, 3] <- original.data$Q4.2.1.2[i] - 1
    choice[i, 4] <- 2 - original.data$Q4.2.1.2[i]
    choice[i, 5] <- original.data$Q4.3.1.2[i] - 1
    choice[i, 6] <- 2 - original.data$Q4.3.1.2[i]
  }
}

# record where each candidate was located on a left-right scale for each respondent
rating <- matrix(NA, N, 6)
for (i in 1:N) {
  if (original.data$condition[i] == 0) {
    rating[i, 1] <- original.data$Q4.1.2.1_1[i]
    rating[i, 2] <- original.data$Q4.1.2.1_2[i]
    rating[i, 3] <- original.data$Q4.2.2.1_1[i]
    rating[i, 4] <- original.data$Q4.2.2.1_2[i]
    rating[i, 5] <- original.data$Q4.3.2.1_1[i]
    rating[i, 6] <- original.data$Q4.3.2.1_2[i]
  }
  else {
    rating[i, 1] <- original.data$Q4.1.2.2_1[i]
    rating[i, 2] <- original.data$Q4.1.2.2_2[i]
    rating[i, 3] <- original.data$Q4.2.2.2_1[i]
    rating[i, 4] <- original.data$Q4.2.2.2_2[i]
    rating[i, 5] <- original.data$Q4.3.2.2_1[i]
    rating[i, 6] <- original.data$Q4.3.2.2_2[i]
  }
}

# check whether the response time was more than five seconds for each task
time.record.matrix <- matrix(NA, N, 6)
for (i in 1:N) {
  if (original.data$condition[i] == 0) {
    time.record.matrix[i, 1] <- time.record.matrix[i, 2] <- original.data$Q4.1.1T_3[i] > 5
    time.record.matrix[i, 3] <- time.record.matrix[i, 4] <- original.data$Q4.2.1T_3[i] > 5
    time.record.matrix[i, 5] <- time.record.matrix[i, 6] <- original.data$Q4.3.1T_3[i] > 5
  }
  else {
    time.record.matrix[i, 1] <- time.record.matrix[i, 2] <- original.data$Q4.1.2T_3[i] > 5
    time.record.matrix[i, 3] <- time.record.matrix[i, 4] <- original.data$Q4.2.2T_3[i] > 5
    time.record.matrix[i, 5] <- time.record.matrix[i, 6] <- original.data$Q4.3.2T_3[i] > 5
  }
}
time.record.vector <- as.vector(t(time.record.matrix))

#### task-level dataset ####
position.label <- c("Agree", "Neither", "Disagree")

task.raw.data <- data.frame(respondent.id = rep(1:N, each = 6),  # respondent ID
                            # choice-based response
                            choice = as.vector(t(choice)), 
                            # rating-based response
                            rating = as.vector(t(rating)), 
                            # candidate positions
                            a.article9 = factor(as.vector(t(profile[, 1, ])), 
                                                labels = position.label), 
                            b.defense = factor(as.vector(t(profile[, 2, ])), 
                                               labels = position.label), 
                            c.revisionism = factor(as.vector(t(profile[, 3, ])), 
                                                   labels = position.label), 
                            d.women = factor(as.vector(t(profile[, 4, ])), 
                                             labels = position.label), 
                            e.gay = factor(as.vector(t(profile[, 5, ])), 
                                           labels = position.label), 
                            f.immigrant = factor(as.vector(t(profile[, 6, ])), 
                                                 labels = position.label), 
                            g.growth = factor(as.vector(t(profile[, 7, ])), 
                                              labels = position.label), 
                            h.tax = factor(as.vector(t(profile[, 8, ])), 
                                           labels = position.label), 
                            # candidates' display position
                            position.right = factor(as.vector(t(profile[, 9, ]))), 
                            # experimental condition ("left"-label condition = 1)
                            condition = rep(condition, each = 6), 
                            # respondents' age divided by 10
                            age.decimal = rep(age / 10, each = 6), 
                            # dummy variable for high self-reported knowledge
                            high.know = rep(ifelse(knowledge < 3, 1, 0), each = 6), 
                            # respondent-level variables for balance checks
                            gender = rep(gender, each = 6), 
                            age = rep(age, each = 6), 
                            low.edu = rep(low.edu, each = 6), 
                            middle.edu = rep(middle.edu, each = 6), 
                            high.edu = rep(high.edu, each = 6), 
                            DID.ratio = rep(DID.ratio, each = 6), 
                            ideology = rep(ideology, each = 6), 
                            article9.attitude = rep(article9.attitude, each = 6), 
                            defense.attitude = rep(defense.attitude, each = 6), 
                            revisionism.attitude = rep(revisionism.attitude, each = 6), 
                            women.attitude = rep(women.attitude, each = 6), 
                            gay.attitude = rep(gay.attitude, each = 6), 
                            immigrant.attitude = rep(immigrant.attitude, each = 6), 
                            growth.attitude = rep(growth.attitude, each = 6), 
                            tax.attitude = rep(tax.attitude, each = 6), 
                            knowledge = rep(knowledge, each = 6), 
                            # variables for detecting satisficing responses
                            satisficing = rep(satisficing, each = 6), 
                            time = time.record.vector)
# exclude satisficing responses
task.data <- subset(task.raw.data, satisficing == FALSE & time == TRUE)

write.csv(task.raw.data, file = "task_raw_data.csv", row.names = FALSE)
write.csv(task.data, file = "task_data.csv", row.names = FALSE)

#### number of observations ####
# number of respondents
N
# number of responses
nrow(task.raw.data)  
# number of observations
nrow(task.data)
# proportion of attentive responses
round(nrow(task.data) / nrow(task.raw.data), 3)
# number of respondents after excluding satisficers
attentive.N <- length(unique(task.data$respondent.id))
attentive.N
# proportion of respondents after excluding satisficers
round(attentive.N / N, 3)